

 ******************************************************************************
 *																			  *
 *							---	Appendix Output ---							  *
 *																			  *
 ******************************************************************************
 


 ******************************************************************************
 *																			  *
 *							---	Appendix Figures ---						  *
 *																			  *
 ******************************************************************************
 
 

 *----------------------------------------------------------------------------*
 *		--- Figure B.1 Mandatory Features of a Candidate Profile, at the Time *
 *					   of the Study	---		  								  *
 *----------------------------------------------------------------------------*
 
 * Example of candidate profile / not data based

 
 *----------------------------------------------------------------------------*
 *		--- Figure B.2 Typical Interview Request Message Sent by a Company    *
 *						to a Candidate, at the Time of the Study ---		  *
 *----------------------------------------------------------------------------*

 * Example of interview request / not data based
 
 
 *----------------------------------------------------------------------------*
 *		--- Figure B.3 Interview Request Rejection Reason as a Function of    *
 * 					   the Bid to Ask Ratio		  							  *
 *----------------------------------------------------------------------------*

{
* Load Data Set

$opendata
$dropvars
$timerestrict

* 1. Create relevant variables
	gen ratio_d_s_salary = d_salary / s_salary 
	gen ratiods = ratio_d_s_salary == 1
	recode ratio_d_s_salary (0.59/0.65 = 0.65) (0.65/0.7 = 0.7) (0.7/0.75 = 0.75) ///
							(0.75/0.8 = 0.8)  (0.8/0.85 = 0.85) (0.85/0.9 = 0.9)  ///
							(0.9/0.95 = 0.95) (0.95/0.999999 = 0.99) (1 = 1)  	  ///
							(1/1.05 = 1.05)   (1.05/1.1 = 1.1)  (1.1/1.15 = 1.15) ///
							(1.15/1.2 = 1.2)  (1.2/1.25 = 1.25) (1.25/1.3 = 1.3)  ///
							(1.3/1.35 = 1.35) (1.35/1.41 = 1.4), gen(auto_bin)


	* Re-classify the rejection reasons in 3 categories:
		*--= The company / individual fit ones:
		gen s_rejection_2 = 0 if s_rejection== 2 | s_rejection== 6 | ///
								 s_rejection== 7 | s_rejection== 8 | ///
								 s_rejection== 12 | s_rejection== 13 | ///
								 s_rejection== 15 | s_rejection== 16 | ///
								 s_rejection== 17 | s_rejection== 22 | ///
								 s_rejection== 24 | s_rejection== 26 | ///
						  		 s_rejection== 27 | s_rejection== 30 | ///
								 s_rejection== 31 | s_rejection== 40 | ///
								 s_rejection== 41 | s_rejection== 44 | ///
								 s_rejection==45

		*--- Money related reasons
		replace s_rejection_2 = 1 if s_rejection== 20 
		drop s_rejection
		ren s_rejection_2 s_rejection

		*--- Label the rejection reasons
		label define comp 0 "fit" 1 "salary" 
		label values s_rejection comp


* 2. Restrict Sample to the first five bids received to ensure that the  candidate is 
*active and available for interviews on the platform at the time he or she receives the request
	keep if s_job_position <= 5


* 3. Create Figure
	collapse s_rejection, by (auto_bin ratiods female)

	label var auto_bin "Ratio of bid to ask salary"
	label var s_rejection `"Share of rejections justified by "bid too low" "'

	sum s_rejection, d
	local mean = r(mean)
	local display: di %5.1f r(mean)*100

	* Interview request rejection reason as a function of the Bid to Ask ratio
	twoway (scatter s_rejection auto_bin if female == 0, m(Oh) mcolor(navy)) 	///
		   (scatter s_rejection auto_bin if female == 1, m(Sh) mcolor(maroon)),  	///
		   ylabel( 0 "0%" 0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%") ///
		   yscale(r(0(0.1)0.6)) ///
		   xlabel(0.6 "0.6" 0.8 "0.8" 1 "1" 1.2 "1.2" 1.4 "1.4") ///
		   yline(`mean' , lp(dash) lcolor(black))  ///
		   legend(ring(0) pos(2) col(1) region(lstyle(none)) ///
		   label(1 "Male") label(2 "Female")) ///
		   text(0.18 0.6 "`display'% of all the rejections" `"are justified by "bid too low""', place(e) size(small))  ///
		   graphregion(color(white)) bgcolor(white)
	graph export "$graphs/figureB3.pdf" , replace 

}
 
 
 *----------------------------------------------------------------------------*
 *		--- Figure B.4 Kernel density of ask and bid salaries ---			  *
 *----------------------------------------------------------------------------*

{
* Load Data Set

$opendata
$dropvars
$timerestrict


* Panel a) Kernel density of ask salaries
{
	kdensity s_salary if female==0, gen(evalm4 densm4) bw(3000) nograph 
	kdensity s_salary if female==1, gen(evalf4 densf4) bw(3000)  nograph 
	
	graph twoway (connected densm4 evalm4, m(i) lw(medium) lc(navy) ) ///
				 (connected densf4 evalf4, m(i) lw(medium) lc(maroon) lp(dash) ), ///
				 ytitle("Density (10^-6)") ylabel(0 "0" 0.000005 "5" 0.00001 "10" 0.000015 "15") ///
				 xline(115672 , lp(dash) lcolor(maroon)) xline(122598 ,  lcolor(navy) lp(solid)) ///
				 xlabel(50000 "50" 75000 "75" 100000 "100" 125000 "125" 150000 ///
				 "150" 175000 "175" 200000 "200" 225000 "225" 250000 "250" 275000 "275" 300000 "300") /// 
				 xscale(r(50000 300000)) xtitle("Ask salary (in thousand $)")  ///
				 legend(ring(0) pos(2) col(1) lab(1 "Male") lab(2 "Female") ///
				 lab(3 " ") lab(4 " ") order(1 3 2 4) region(lstyle(none)) ///
				 symxsize(8) keygap(1) textwidth(25) ) graphregion(color(white)) 
	graph export "$graphs/figureB4a.pdf", replace
}
	
* Panel b) Kernel density of bid salaries	
{
	kdensity d_salary if female==0, gen(evalm1 densm1) bw(3000)  nograph 
	kdensity d_salary if female==1, gen(evalf1 densf1) bw(3000)  nograph 
	
	graph twoway  (connected densm1 evalm1, m(i) lw(medium) lc(navy) ) ///
				  (connected densf1 evalf1, m(i) lw(medium) lc(maroon) lp(dash) ), ///
				  xline(115784 , lp(dash) lcolor(maroon)) xline(121228 , lcolor(navy) lp(solid)) ///
			      ytitle("Density (10^-6)") ylabel(0 "0" 0.000005 "5" 0.00001 "10" 0.000015 "15") ///
				  xlabel(50000 "50" 75000 "75" 100000 "100" 125000 "125" 150000 ///
				  "150" 175000 "175" 200000 "200" 225000 "225" 250000 "250" 275000 "275" 300000 "300") ///
				  xscale(r(50000 300000)) xtitle("Bid salary (in thousand $)") ///
				  legend(ring(0) pos(2) col(1) lab(1 "Male") lab(2 "Female") ///
				  lab(3 " ") lab(4 " ") order(1 3 2 4) region(lstyle(none)) ///
				  symxsize(8) keygap(1) textwidth(25) ) graphregion(color(white))  
	graph export "$graphs/figureB4b.pdf", replace
}	



} 
 
 
 *----------------------------------------------------------------------------*
 *		--- Figure B.5 Ask Feature Change on the Platform ---				  *
 *----------------------------------------------------------------------------*

 * Example of salary elicitation / not data based
  

 *----------------------------------------------------------------------------*
 *		--- Figure B.6 Binned scatter plot of the number of bids received	  *
 *					   as a function of the residual log ask salary ---		  *
 *----------------------------------------------------------------------------*

{

* Load Data Set

$opendata
$dropvars
$timerestrict

sort batchid batch_start_date
collapse (first) sid female logs_salary  $fcontrols $ccontrols$ $preferences smonthyear ///
		 (sum) d_sent , by(batchid)

ren d_sent nb_d_received

reg logs_salary $controls, r
predict resid, resid

binscatter ( nb_d_received resid ), n(60)  ///
linetype(none) xtitle("Residual Log Ask salary", size(large)) scale(.8) /// 
ytitle("Nb of bids received", size(large)) xlabel(-1(0.5)1,labsize(large)) ///
 ylabel(1(1)4,labsize(large))
graph export "$graphs/figureB6.pdf" ,replace 

	
}

 
 *----------------------------------------------------------------------------*
 *		--- Figure C.1 Sorted Effects of the Gender Ask Gap ---				  *
 *----------------------------------------------------------------------------*
 
 *	Created in R, see code heterogeneity.R
 
 
 *----------------------------------------------------------------------------*
 *		--- Figure G.1  Binned scatter plot of firm productivity			  *
 *						as a function of the residual log ask salary		  *
 *----------------------------------------------------------------------------*
 

 


 {
 /*
 This Figure uses estimates from a different paper using Hired.com data:
 'Bidding for Talent: Equilibrium Wage Dispersion on a High-Wage Online Job Board'
  with Benjamin Scuderi The data there is also proprietary. 
  
 */

use "$indata/prod_ask.dta",  clear
binscatter ep_mc0_z ask_r, xtit("Residual Log Ask Salary") ///
						   ytit("Firm Comp. of Productivity (Normalized)") 
graph export "$graphs/prod_ask.pdf" ,replace 						   

 }
 
 


 *----------------------------------------------------------------------------*
 *		--- Figure H.1 Candidate's Target Firm Choice - Two Firm Example ---  *
 *----------------------------------------------------------------------------*

 *	Externally created / not data based
 
 
 *----------------------------------------------------------------------------*
 *		--- Figure H.2 Empirical Distribution of Firm Types	---	 			  *
 *----------------------------------------------------------------------------*

 {	
 	
* Load Data Set

$opendata
$dropvars
$timerestrict

 	** Create new individual*spell id that goes counts up from 1
	egen newbatchid = group(batchid)
	replace batchid = newbatchid
	drop newbatchid
	sum batchid 
	local max_batchid = r(max)

	** For each firm, define the "reserve ask" as the lowest (residualized) ask price to which they ever make an interview request. 
	preserve
	bys batchid (batch_start_date): keep if _n == 1
	reg logs_salary $controls
	predict residask, resid
	keep batchid residask
	tempfile predict 
	save `predict'
	restore
	merge m:1 batchid using `predict'
	* Only keep if company has sent more than 20 requests 
	drop if mi(companyid)
	bys companyid : gen nbsent = _N
	keep if nbsent >= 20
	* Isolate the range for each company
	drop if mi(jobid)
	bys jobid: egen minask = min(residask)
	bys jobid: egen maxask = max(residask)
	** Create new job ID that goes counts up from 1
	egen newjobid = group(jobid)
	replace jobid = newjobid
	drop newjobid
	sum jobid 
	local k = r(max)
	display `k'

	keep batchid jobid minask maxask  

	* Loop through 100er groups of jobs
	forvalues j=1(100)`k' {
	
		preserve
		* Create 100er groups of companies
		keep if jobid >= `j' & jobid < `j' + 100
		egen newjobid = group(jobid)
		replace jobid = newjobid
		drop if mi(jobid)
		drop newjobid
		* For each batchid, give me the min and max ask from all jobs in this batch 
		reshape wide minask maxask , i(batchid) j(jobid)					
		* make sure that all are included be matched
		sort batchid
		tsset  batchid
		tsfill						// filling in all other batchids until the maximum within sample
		set obs `max_batchid'		// filling in all other batchids all remaining batchids 
		replace batchid = _n if mi(batchid)

		* Merge in the predicted log ask per batchid 
		merge m:1 batchid using `predict'
		gen one = 1

		* Fill each column with the value within this column 
		xtset, clear
		xfill minask*, i(one)
		xfill maxask*, i(one)

		* How many of the 100 bidranges cover this ask?
		gen count = 0
		forvalues i=1(1)100 {
		cap replace count = count + 1 if minask`i' < residask &  maxask`i' > residask
		}
	
		* Select the number of bidranges for each ask level within this batch 
		collapse (first) count, by(residask)
		tempfile reshaped`j'
		save `reshaped`j''
		if `j' == 1 {
		save "$data/bellshape.dta", replace
		}
		restore
		
		preserve
		if `j' != 1 {
		use "$data/bellshape.dta", clear
		append using `reshaped`j''
		save "$data/bellshape.dta", replace
		}
		restore
	}

	* Data tells me for how many jobs a specific ask salary falls within the bid range for this job.
	use "$data/bellshape.dta", clear
	collapse (sum) count, by(residask)

	gen probafirm = count/39839		//number of jobs

	binscatter probafirm residask,  nq(60) line(none) m(C) scale(.6) ///
			   ylabel(,labsize(huge)) xtitle("Residual Ask salary", size(huge)) ///
			   xscale(r(-1,1)) xlabel(-1(.5)1,labsize(huge)) ///
			   ytitle("Firms' Pr(min ask <= x & max ask > x)", size(huge)) 
	graph export "$graphs/figureH2.pdf" , replace 
 }

 ******************************************************************************
 *																			  *
 *							---	Appendix Tables ---						  	  *
 *																			  *
 ******************************************************************************
 

 
 *----------------------------------------------------------------------------*
 *		--- Table A.1 Fields on a Candidate's Profile and Other Variables 	  *
 *				 	  Used as Controls ---									  *
 *----------------------------------------------------------------------------*
 
 *	Direct entry / not data based 

 
 *----------------------------------------------------------------------------*
 *		--- Table A.2 Relationship Between Gender and Expressed Preferences	  *
 * 					  over Firm Characteristics ---							  *
 *----------------------------------------------------------------------------* 
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict


* Generate dummy whether any preference has been noted
gen nopref = mi(wish_CompanySize)
replace nopref = 1 if wish_CompanySize=="no preference"
replace nopref = 0 if (wish_CareerGoal!="no preference" & !mi(wish_CareerGoal))
replace nopref = 0 if (wish_CareerPath!="no preference" & !mi(wish_CareerPath))
replace nopref = 0 if (wish_Industry!="no preference" & !mi(wish_Industry))
replace nopref = 0 if (wish_NormalizedSkill!="no preference" & !mi(wish_NormalizedSkill))
tab nopref

collapse (first) female nopref $fcontrols $ccontrols $preferences logs_salary smonthyear, by(sid)

* Express No Preference
qui sum nopref if female==0
scalar m_mean = r(mean)
reg nopref i.female $controls, r
estimates store nopref
	estadd scalar m_mean 
	estadd local hascontrols "X" 
	estadd local timefe "X"

 **** Company Size
local x = 0

foreach csize in csize1_15 csize16_50 csize51_200 csize201_500 csize500 {
	
	local x = `x'+1
	
	qui sum `csize' if female==0
		scalar m_mean = r(mean)
		
	reg `csize' i.female $ccontrols i.(${fcontrols}) i.smonthyear i.(wishind_*) i.(cpath_*) ///
						 i.(cgoal_*) i.(wishskill_*), r
		estimates store size_`x'
			estadd scalar m_mean 
			estadd local hascontrols "X" 
			estadd local timefe "X"
}

 **** Career Goal
local x = 0 

foreach cgoal in cgoal_tech cgoal_lead cgoal_mentor cgoal_lsoc {
	
	local x = `x'+1
	
	qui sum `cgoal' if female==0
		scalar m_mean = r(mean)
	reg `cgoal' i.female $ccontrols i.(${fcontrols}) i.smonthyear i.(wishind_*) i.(cpath_*) ///
						 i.(csize*) i.(wishskill_*), r
		estimates store goal_`x'
			estadd scalar m_mean 
			estadd local hascontrols "X" 
			estadd local timefe "X"
}
	   
 **** Industry
local x = 0 

foreach industry in wishind_hardware wishind_bank wishind_education wishind_health {
		local x = `x'+1
	
	qui sum `industry' if female==0
		scalar m_mean = r(mean)
		
	reg `industry'  i.female $ccontrols i.(${fcontrols}) i.smonthyear i.(cgoal_*) i.(cpath_*) ///
					i.(csize*) i.(wishskill_*) , r
		estimates store industry_`x'
			estadd scalar m_mean 
			estadd local hascontrols "X" 
			estadd local timefe "X"
}

 
esttab nopref size_1 size_2 size_3 size_4 size_5 ///
       industry_1 industry_2 industry_3 industry_4 ///
	   goal_1 goal_2 goal_3 goal_4 ///
	   using "$tables/tableA2.tex", $tableprefs	


}
 
 
 *----------------------------------------------------------------------------*
 *		--- Table A.3 The Last Ask Salary as a Function of Gender 			  *
 *					  and Resume Characteristics ---						  *
 *----------------------------------------------------------------------------* 
 
{ 
	
* Load Data Set

$opendata
$dropvars
$timerestrict

preserve
sort sid batch_start_date

** collapse at the sid level
collapse (last) female logs_salary $fcontrols $ccontrols $preferences smonthyear  company_name, by(sid)

* raw gap + time FE
reg logs_salary i.female i.smonthyear , r

	estimates store OLS0_long1
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "" 
	estadd local fieldfe "" 
	estadd local locationfe ""
	estadd local educationfe ""
	estadd local preferencefe ""
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""
	
* raw gap +  experience  + location + field
reg logs_salary i.female i.smonthyear ///
	i.s_primary_field_exp s_total_exp s_total_exp2 i.s_choice_location ///
	i.s_current_location i.s_primary_field i.nbpastbatch i.batch_length, r
	
	estimates store OLS0_long2
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local educationfe ""
	estadd local preferencefe ""
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""
	
* raw gap + experience + location + field + education 
reg logs_salary i.female i.smonthyear ///
	i.s_primary_field_exp s_total_exp s_total_exp2 i.s_choice_location ///
	i.s_current_location i.s_primary_field i.nbpastbatch i.batch_length /// 
	i.cat_degree i.csdegree i.ivyplus s_grad_year i.ranking, r
	
	estimates store OLS0_long3
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe ""
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""
	
* raw gap + experience + location + field + education + preferences
reg logs_salary i.female i.smonthyear ///
i.s_primary_field_exp s_total_exp s_total_exp2 ///
	i.s_primary_field_exp s_total_exp s_total_exp2 i.s_choice_location ///
	i.s_current_location i.s_primary_field i.nbpastbatch i.batch_length /// 
	i.cat_degree i.csdegree i.ivyplus s_grad_year i.ranking ///
	i.s_contract i.s_search_status i.s_sponsorship i.(${preferences}), r
	
	estimates store OLS0_long4
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe ""
	estadd local monthfe "X"
	estadd local firmfe ""

* raw gap + experience + location + field + external websites + education + preferences + employment 
reg logs_salary i.female $controls, r
	estimates store OLS0_long5
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe "X"
	estadd local monthfe "X"
	estadd local firmfe ""


* Most recent company FE
encode company_name, gen(firm_name)
xtset firm_name

reghdfe logs_salary i.female $controls, absorb(firm_name) vce(robust)
	estimates store OLS0_firmFE
	estadd scalar r2_adj = e(r2_a_within)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe "X"
	estadd local monthfe "X"
	estadd local firmfe "X"

restore

* Regression at the bid level

reg logs_salary i.female $controls if !mi(d_salary), vce(cluster sid)
	
	estimates store OLS0_alt
	estadd scalar r2_adj = e(r2_a)
	estadd local experiencefe "X" 
	estadd local locationfe "X"
	estadd local fieldfe "X" 
	estadd local external "X"
	estadd local educationfe "X"
	estadd local preferencefe "X"
	estadd local employmentfe "X"
	estadd local monthfe "X"
	estadd local firmfe ""
 

*Exporting Long Ask Table
esttab OLS0_long1 OLS0_long2 OLS0_long3 OLS0_long4 OLS0_long5 OLS0_firmFE OLS0_alt ///
	   using "$tables/tableA3.tex", $tableask
	  
}


 *----------------------------------------------------------------------------*
 *		--- Table A.4 Ask Gap Corrected for Unobservables following 		  * 
 *				 	  Altonji, Elder, and Taber (2005) ---					  *
 *----------------------------------------------------------------------------* 

{
* Load Data Set

$opendata
$dropvars
$timerestrict

sort sid batch_start_date

** collapse at the sid level
collapse (first) female logs_salary $fcontrols $ccontrols $preferences smonthyear, by(sid)
				 
* Set-up matrix for AET Table
matrix C=J(1,9,.)
 
* Raw ask gap on only Female (without month X year FE for lower bound)
reg logs_salary 1.female, r
mat b = r(table)
mat C[1,1] = b[1,1]
mat C[1,2] = b[2,1]
mat C[1,3] = e(r2)
 
* Controlled ask gap 
reg logs_salary i.female $controls, r
mat b = r(table)
mat C[1,4] = b[1,2]
mat C[1,5] = b[2,2]
mat C[1,6] = e(r2)
mat C[1,7] = b[1,2]

* AET calculation
psacalc beta 1.female, rmax(1) delta(1) beta(0)
mat C[1,8] = r(beta)
mat C[1,9] = r(rmax)

frmttable using "$tables/tableA4.tex", tex statmat(C) varlabels ///
	fragment sdec(3,3,3,3,3,3,3,3,0) replace ///
	ctitle("\textbf{Treatment Variable:} & \multicolumn{3}{c}{\textbf{Baseline Effect}} & \multicolumn{3}{c}{\textbf{Controlled Effect}} & \multicolumn{2}{c}{\textbf{Identified Set}} & \textbf{R max} \\ \cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10}", "Coefficient", "(Std. Error)" , "[R-sqrd]","Coefficient","(Std. Error)","[R-sqrd]", "for $\beta=0$,", "$\Tilde{\delta}=1$", "") ///
	rtitles("\textbf{Female}") 

}


 *----------------------------------------------------------------------------*
 *		--- Table A.5 The Role of the Ask Salary and Resume Characteristics   *
 *					  in Bid Salary Gender Differences Including Equity ---	  *
 *----------------------------------------------------------------------------*  
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict

sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'

*Bid salary with firm FE on characteristics, ask salary, and both

ivreg2  logd_salary  i.female i.smonthyear  i.equity, cluster(jobid sid)
estimates store OLS1eq1
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols ""
estadd local timefe "X"
estadd local firmfe "X"

ivreg2  logd_salary  i.female $controls  i.equity, cluster(jobid sid)
estimates store OLS1eq2
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

ivreg2  logd_salary  i.female centered_logask i.smonthyear  i.equity, cluster(jobid sid)
estimates store OLS1eq3
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols ""
estadd local timefe "X"
estadd local firmfe "X"

ivreg2  logd_salary  i.female centered_logask $controls  i.equity, cluster(jobid sid)
estimates store OLS1eq4
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

ivreg2  logd_salary i.female##c.centered_logask $controls  i.equity, cluster(jobid sid)
estimates store OLS1eq5
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

esttab  OLS1eq1 OLS1eq2 OLS1eq3 OLS1eq4 OLS1eq5 using "$tables/tableA5.tex", $tablebid 
	   

}


 *----------------------------------------------------------------------------*
 *		--- Table A.6 The Role of the Ask Salary and Resume Characteristics	  *
 *					  in Bid Salary Gender Differences for a Given Firm ---	  *
 *----------------------------------------------------------------------------*  
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict

sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'

*Bid salary with firm FE on characteristics, ask salary, and both

reghdfe logd_salary  i.female i.smonthyear, absorb(companyid) vce(cluster jobid sid)
estimates store OLS1fe1
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols ""
estadd local timefe ""
estadd local firmfe "X"

reghdfe logd_salary  i.female $controls, absorb(companyid) vce(cluster jobid sid)
estimates store OLS1fe2
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

reghdfe logd_salary  i.female centered_logask i.smonthyear, absorb(companyid) vce(cluster jobid sid)
estimates store OLS1fe3
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols ""
estadd local timefe ""
estadd local firmfe "X"

reghdfe logd_salary  i.female centered_logask $controls, absorb(companyid) vce(cluster jobid sid)
estimates store OLS1fe4
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

reghdfe logd_salary i.female##c.centered_logask $controls, absorb(companyid) vce(cluster jobid sid)
estimates store OLS1fe5
estadd scalar r2_adj = e(r2_a_within)
estadd local hascontrols "X"
estadd local timefe "X"
estadd local firmfe "X"

esttab  OLS1fe1 OLS1fe2 OLS1fe3 OLS1fe4 OLS1fe5 using "$tables/tableA6.tex", $tablebidfirm 
	   

}


 *----------------------------------------------------------------------------*
 *		--- Table A.7 The Role of the Ask Salary and Resume Characteristics	  *
 * 					  in Bid Salary Gender Differences - Sample Restriction:  *
 *					  Only Keep Bids for Jobs that Lead to a Hire on the 	  *
 *					  Platform ---									 		  *
 *----------------------------------------------------------------------------*   
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict

sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'


** bid salary table only on the sample of jobs that actually hire candidates
keep if d_hire == 1 

ivreg2 logd_salary i.female  i.smonthyear, cluster(jobid sid)
estimates store OLS1_hire1
estadd local hascontrols ""
estadd local timefe "X"
 
ivreg2 logd_salary i.female $controls, cluster(jobid sid)
estimates store OLS1_hire2
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female centered_logask i.smonthyear, cluster(jobid sid)
estimates store OLS1_hire3
estadd local hascontrols ""
estadd local timefe "X"

ivreg2 logd_salary i.female centered_logask $controls, cluster(jobid sid)
estimates store OLS1_hire4
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female##c.centered_logask $controls, cluster(jobid sid)
estimates store OLS1_hire5
estadd local hascontrols "X"
estadd local timefe "X"

esttab OLS1_hire1 OLS1_hire2 OLS1_hire3 OLS1_hire4 OLS1_hire5 using "$tables/tableA7.tex", $tablebid 

} 


 *----------------------------------------------------------------------------*
 *		--- Table A.8 The Role of the Ask Salary and Resume Characteristics   *
 *					  in Bid Salary Gender Differences - Sample Restriction:  *
 * 					  Only Keep Bids that are Different from the Candidate's  *
 *					  Ask ---										  		  *
 *----------------------------------------------------------------------------*   
 
{
* Load Data Set

$opendata
$dropvars
$timerestrict

sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'

*** bid table removing the instances where bid = ask
keep if s_salary != d_salary

ivreg2 logd_salary i.female i.smonthyear, cluster(jobid sid)
estimates store OLS1_sd1
estadd local hascontrols ""
estadd local timefe "X"
 
ivreg2 logd_salary i.female $controls, cluster(jobid sid)
estimates store OLS1_sd2
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female centered_logask i.smonthyear, cluster(jobid sid)
estimates store OLS1_sd3
estadd local hascontrols ""
estadd local timefe "X"

ivreg2 logd_salary i.female centered_logask $controls, cluster(jobid sid)
estimates store OLS1_sd4
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female##c.centered_logask $controls, cluster(jobid sid)
estimates store OLS1_sd5
estadd local hascontrols "X"
estadd local timefe "X"

esttab OLS1_sd1 OLS1_sd2 OLS1_sd3 OLS1_sd4 OLS1_sd5 using "$tables/tableA8.tex", $tablebid  

} 

  
 *----------------------------------------------------------------------------*  
 *		--- Table A.9 and A.11 Predicted Ask Gap using a Model Fitted on the 	      *
 *					  Pre-Reform Sample ---									  *
 *----------------------------------------------------------------------------*  

{ 
* Load Data Set
* no longer restrict to pre-period
$opendata
$dropvars
* SF software engineers
keep if s_primary_field == 4 & s_choice_location == 1

sort sid batch_start_date

collapse (first) female after female_after logs_salary $fcontrols $ccontrols $preferences smonthyear, by(sid)


reg logs_salary i.female $controls if after == 1, r
estimate store logs_salary_after
estadd local hascontrols "X"

reg logs_salary i.female $controls if after == 0, r
predict logs_salary_pred, xb
estimate store logs_salary_before
estadd local hascontrols "X"

reg  logs_salary_pred i.female after i.female_after  , r
estimate store logs_salary_pred

	   
esttab  logs_salary_pred  using "$tables/tableA9.tex", $tablepredi1

esttab  logs_salary_before logs_salary_after  using "$tables/tableA11.tex", $tablepredi2
					
 
 }
 
 *----------------------------------------------------------------------------*
 *		--- Table A.10 Summary Statistics on Candidates before and after 	  *
 *					   the Reform ---										  *
 *----------------------------------------------------------------------------* 
 
 
{
* Load Data Set
* no longer restrict to pre-period
$opendata
$dropvars
* SF software engineers
keep if s_primary_field == 4 & s_choice_location == 1

mat T = J(13,4,.)

* number of bids
count if after==1 & female==1 & !mi(d_salary)
mat T[1,1] = r(N)
count if after == 0 & female==1 & !mi(d_salary)
mat T[1,2] = r(N)
count if after ==1 & female==0 & !mi(d_salary)
mat T[1,3] = r(N)
count if after == 0 & female==0 & !mi(d_salary)
mat T[1,4] = r(N)

** collapse at the sid level
sort sid batch_start_date
collapse (first) female after female_after logs_salary $fcontrols $ccontrols $preferences smonthyear smonth ///
				 company_name jobid batchid, by(sid)

	
*** main variables differences ***

* number of candidates
count if after ==1 & female==1
mat T[2,1] = r(N)
count if after == 0 & female==1
mat T[2,2] = r(N)
count if after ==1 & female==0
mat T[2,3] = r(N)
count if after == 0 & female==0
mat T[2,4] = r(N)

* experience
sum s_total_exp if after ==1 & female==1
mat T[3,1] = r(mean)
sum s_total_exp if after == 0 & female==1
mat T[3,2] = r(mean)
sum s_total_exp if after ==1 & female==0
mat T[3,3] = r(mean)
sum s_total_exp if after == 0 & female==0
mat T[3,4] = r(mean)

* Share with a bachelor (female)
count if after ==1 & female==1
local temp1 = r(N)
count if after == 0 & female==1
local temp2 = r(N)
count if  cat_degree  >= 2 & after ==1 & female==1
mat T[4,1] =  r(N) / `temp1' *100
count if cat_degree  >=  2 & after == 0 & female==1
mat T[4,2] =  r(N) / `temp2' *100

* Share with a master (female)
count if  cat_degree  >= 3 & after ==1 & female==1
mat T[5,1] =  r(N) / `temp1' *100
count if cat_degree  >=  3 & after == 0 & female==1
mat T[5,2] =  r(N) / `temp2' *100

* Share with a cs degree (female)
sum csd if after ==1 & female==1
mat T[6,1] = r(mean) *100
sum csd if after == 0 & female==1
mat T[6,2] = r(mean) *100

* Share with a bachelor (Male)
count if after ==1 & female==0
local temp1 = r(N)
count if after == 0 & female==0
local temp2 = r(N)
count if  cat_degree  >= 2 & after ==1 & female==0
mat T[4,3] =  r(N) / `temp1' *100
count if after == 0
count if cat_degree  >=  2 & after == 0 & female==0
mat T[4,4] =  r(N) / `temp2' *100
	
* Share with a master (Male)
count if  cat_degree  >= 3 & after ==1 & female==0
mat T[5,3] =  r(N) / `temp1' *100
count if cat_degree  >=  3 & after == 0 & female==0
mat T[5,4] =  r(N) / `temp2' *100
	
* Share with a cs degree
sum csd if after ==1 & female==0
mat T[6,3] = r(mean) *100
sum csd if after == 0 & female==0
mat T[6,4] = r(mean) *100

* Share ivy
sum ivy if after ==1 & female==1
mat T[7,1] = r(mean) *100 
sum ivy if after == 0 & female==1
mat T[7,2] = r(mean) *100 
sum ivy if after ==1 & female==0
mat T[7,3] = r(mean) *100 
sum ivy if after == 0 & female==0
mat T[7,4] = r(mean) *100 

* Job type (full time?)
replace s_contract = 0 if s_contract == 2 | s_contract == 4
replace s_contract = 1 if s_contract == 3
sum s_contract if after ==1 & female==1
mat T[8,1] =  (1 - r(mean)) *100
sum s_contract if after == 0 & female==1
mat T[8,2] = (1- r(mean)) *100
sum s_contract if after ==1 & female==0
mat T[8,3] =  (1 - r(mean)) *100
sum s_contract if after == 0 & female==0
mat T[8,4] = (1- r(mean)) *100

* Share in need of s_sponsorship
sum s_sponsorship if after ==1 & female==1
mat T[9,1] = r(mean) *100
sum s_sponsorship if after == 0 & female==1
mat T[9,2] = r(mean) *100
sum s_sponsorship if after ==1 & female==0
mat T[9,3] = r(mean) *100
sum s_sponsorship if after == 0 & female==0
mat T[9,4] = r(mean) *100

* Share working remote
count if s_remote == 2 & after ==1 & female==1
mat T[10,1] =  r(N) / `temp1' *100
count if s_remote == 2 & after == 0 & female==1
mat T[10,2] =  r(N) / `temp2' *100
count if s_remote == 2 & after ==1 & female==0
mat T[10,3] =  r(N) / `temp1' *100
count if s_remote == 2 & after == 0 & female==0
mat T[10,4] =  r(N) / `temp2' *100

* Share employed
sum employed if after ==1 & female==1
mat T[11,1] = r(mean) *100 
sum employed if after == 0 & female==1
mat T[11,2] = r(mean) *100 
sum employed if after ==1 & female==0
mat T[11,3] = r(mean) *100 
sum employed if after == 0 & female==0
mat T[11,4] = r(mean) *100 

* Share FAANG
sum faang if after ==1 & female==1
mat T[12,1] = r(mean) *100 
sum faang if after == 0 & female==1
mat T[12,2] = r(mean) *100 
sum faang if after ==1 & female==0
mat T[12,3] = r(mean) *100 
sum faang if after == 0 & female==0
mat T[12,4] = r(mean) *100 

* Share leading team
count if after ==1 & female==1 
local temp1 = r(N)
count if after == 0 & female==1 
local temp2 = r(N)
count if s_nb_reports  > 0 & after ==1 & female==1 
mat T[13,1] =  r(N) / `temp1' *100
count if s_nb_reports  > 0 & after ==0 & female==1 
mat T[13,2] =  r(N) / `temp2' *100
count if after ==1 & female==0 
local temp1 = r(N)
count if after == 0 & female==0 
local temp2 = r(N)
count if s_nb_reports  > 0 & after ==1 & female==0 
mat T[13,3] =  r(N) / `temp1' *100
count if s_nb_reports  > 0 & after ==0 & female==0 
mat T[13,4] =  r(N) / `temp2' *100


frmttable using "$tables/tableA10.tex", tex statmat(T) varlabels  fragment sdec(1) replace ///
ctitle("Variable", "Female - After", "Female - Before", "Male - After", "Male - Before")  ///
rtitles("Nb. of Bids" \ "Nb. of Candidates" \ "Years of experience" \  "Share with a bachelor" \  "Share with a master" \ "Share with a CS degree" \ "Share with an IvyPlus degree"\ "Share looking for full time job" \ "Share in need of visa sponsorship" \ "Share of remote only workers" \ "Share employed" \ "Share that worked at a FAANG" \ "Share leading a team" )  

}
 
 
 *----------------------------------------------------------------------------*
 *		--- Table A.11 Impact of the Reform on Controls other than Gender 	  *
 *					   in the Ask Gap Estimation ---						  *
 *----------------------------------------------------------------------------* 
 
 * see above in Table A9
 

 *----------------------------------------------------------------------------*
 *		--- Table A.12 The Ask Gap by Share of Women in the Labor Market ---  *
 *----------------------------------------------------------------------------* 

{
* Load Data Set
* no longer restrict to pre-period
$opendata
$dropvars
* SF software engineers
keep if s_primary_field == 4 & s_choice_location == 1

* can't use the $controls
collapse (first)  	female after female_after logs_salary $ccontrols $preferences s_choice_location ///
					cat_degree csdegree ivyplus s_contract s_primary_field_exp ///
					s_search_status s_sponsorship s_remote employed faang ///
					s_nb_reports smonthyear nbpastbatch batch_length $skills ///
					highest_jobtitle linkedin website ranking, by(sid s_current_location s_primary_field)

					
* Residualise Salary
reg logs_salary     $ccontrols i.s_primary_field_exp i.s_choice_location  ///		
					i.cat_degree i.csdegree i.ivyplus i.s_contract ///
					i.s_search_status i.s_sponsorship i.s_remote i.employed i.faang ///
					i.s_nb_reports i.smonthyear i.nbpastbatch i.batch_length $skills i.(${preferences}) ///
					i.highest_jobtitle i.linkedin i.website i.ranking, r
predict resid, resid



* Create Ask Gap Coefficients

	* (a) Raw Ask Gap
	reg logs_salary i.female#i.s_current_location#i.s_primary_field, r 	
	local constant=_b[_cons]
	predict askgap_coeff
	di `constant'
	replace askgap_coeff = askgap_coeff - `constant'
	replace askgap_coeff = askgap_coeff * (-1)

	* (b) Residual Ask Gap
	reg resid i.female#i.s_current_location#i.s_primary_field, r 
	local constant=_b[_cons]
	predict res_askgap_coeff
	di `constant'
	replace res_askgap_coeff = res_askgap_coeff - `constant'
	replace res_askgap_coeff = res_askgap_coeff * (-1)


* Define Ask Gap as Difference between coefficient of Men and Women with respect to location and job

	* We are only interested in men and women for this
	drop if female == 2
	* Calculate the difference for each comb of s_current_location and s_primary_field
	preserve
	collapse (first) res_askgap_coeff askgap_coeff, by(female s_current_location s_primary_field)
	* Calculate the difference for each comb of s_current_location and s_primary_field
	bysort s_current_location (s_primary_field female) : gen ask_gap = askgap_coeff - askgap_coeff[_n-1]
	replace ask_gap = . if female==0
	bysort s_current_location (s_primary_field female) : gen res_ask_gap = res_askgap_coeff - res_askgap_coeff[_n-1]
	replace res_ask_gap = . if female==0
	tempfile ask_gap
	save `ask_gap'
	restore

merge m:1 s_current_location s_primary_field female using `ask_gap'
					
* Create Share for each combination of location and job

egen job_loc= concat(s_current_location s_primary_field), punct(:)

bysort job_loc: egen share = count(job_loc)
replace share = share/r(N)


collapse (mean) res_ask_gap ask_gap female (first) share job_loc, by(s_current_location s_primary_field) 	

* Create Table for Relationship between Askgap and Share of Female

sum female, meanonly
gen cent_fshare = female - `r(mean)'

reg ask_gap cent_fshare [w=share], r
est sto rawfull

reg res_ask_gap cent_fshare [w=share], r
est sto resfull
	estadd local hascontrols "X"

esttab rawfull resfull using "$tables/tableA12.tex", $rawfull

}


 *----------------------------------------------------------------------------*
 *		--- Table C.1 Classification Analysis - Averages of Characteristics	  *
 *					  of the Women with the Smallest and Largest Ask Gap ---  *
 *----------------------------------------------------------------------------* 

 * Created in R, see code heterogeneity.R
 
 
 *----------------------------------------------------------------------------*
 *		--- Table E.1 The Racial Ask, Bid and Final Salary Gap ---			  *
 *----------------------------------------------------------------------------* 

{	
* Load Data Set

$opendata
$dropvars
$timerestrict
	
preserve
sort sid batch_start_date
collapse (first) logs_salary female africanamerican asian hispanic $fcontrols $ccontrols $preferences smonthyear, by(sid)

* controlled gap
reg logs_salary i.female i.africanamerican i.hispanic i.asian  ///
i.female#i.africanamerican i.female#i.hispanic i.female#i.asian i.smonthyear, r
estimates store OLS0_long0
estadd local hascontrols ""
estadd local timefe "X"

reg logs_salary i.female i.africanamerican i.hispanic i.asian  ///
i.female#i.africanamerican i.female#i.hispanic i.female#i.asian ///
$controls, r
estimates store OLS0_long8
estadd local hascontrols "X"
estadd local timefe "X"
restore

* Bid salary
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'

ivreg2 logd_salary i.female i.africanamerican i.hispanic i.asian  ///
i.female#i.africanamerican i.female#i.hispanic i.female#i.asian  i.smonthyear, cluster(jobid sid)
estimates store OLS11
estadd local hascontrols ""
estadd local timefe "X"

ivreg2 logd_salary i.female i.africanamerican i.hispanic i.asian  ///
i.female#i.africanamerican i.female#i.hispanic i.female#i.asian $controls, cluster(jobid sid)
estimates store OLS12
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female i.africanamerican i.hispanic i.asian  ///
i.female#i.africanamerican i.female#i.hispanic ii.female#i.asian centered_logask ///
i.smonthyear, cluster(jobid sid)
estimates store OLS13
estadd local hascontrols ""
estadd local timefe "X"

ivreg2 logd_salary i.female i.africanamerican i.hispanic i.asian  ///
i.female#i.africanamerican i.female#i.hispanic i.female#i.asian centered_logask ///
$controls, cluster(jobid sid)
estimates store OLS14
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logd_salary i.female i.africanamerican i.hispanic i.asian centered_logask ///
i.female#c.centered_logask  i.africanamerican#c.centered_logask  ///
i.hispanic#c.centered_logask i.asian#c.centered_logask $controls, cluster(jobid sid)
estimates store OLS15
estadd local hascontrols "X"
estadd local timefe "X"

* Final Salary
ivreg2 logdf_salary i.female i.africanamerican i.hispanic i.asian i.smonthyear, cluster(jobid sid)
estimates store OLS21
estadd local hascontrols ""
estadd local timefe "X"
 
ivreg2 logdf_salary i.female i.africanamerican i.hispanic i.asian $controls, cluster(jobid sid)
estimates store OLS22
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logdf_salary i.female i.africanamerican i.hispanic i.asian centered_logask i.smonthyear, cluster(jobid sid)
estimates store OLS23
estadd local hascontrols ""
estadd local timefe "X"

ivreg2 logdf_salary i.female i.africanamerican i.hispanic i.asian centered_logask $controls, cluster(jobid sid)
estimates store OLS24
estadd local hascontrols "X"
estadd local timefe "X"

ivreg2 logdf_salary i.female i.africanamerican i.hispanic i.asian centered_logask ///
i.female#c.centered_logask  i.africanamerican#c.centered_logask  ///
i.hispanic#c.centered_logask i.asian#c.centered_logask $controls, cluster(jobid sid)
estimates store OLS25
estadd local hascontrols "X"
estadd local timefe "X"

esttab 	OLS0_long0 OLS0_long8 OLS11 OLS12 OLS13 OLS14 ///
		OLS15 OLS21 OLS22 OLS23 OLS24 OLS25 using ///
		"$tables/tableE1.tex", $tablebidrace 
}
 

 *----------------------------------------------------------------------------*
 *		--- Table F.1 The Within-Candidate Effect of a Change of the 	      *
 * 					  Ask Salary on the Bid Salary ---		     			  *
 *----------------------------------------------------------------------------*  

{
	
* Load Data Set

$opendata
$dropvars
$timerestrict
	
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'


* time between updates
bys sid (batch_start_date d_sent_date): gen time_btw_batch =  dofc(batch_start_date[_n]) - dofc(batch_start_date[_n-1]) if batch_start_date[_n] !=batch_start_date[_n-1]
bys sid (batch_start_date d_sent_date): gen tot_time_btw_batch = sum(time_btw_batch)

* We group "spells" in 6 month sessions
gen session= 1 if tot_time_btw_batch <= 90
replace session= 2 if tot_time_btw_batch > 90 & tot_time_btw_batch < 180
replace session= 3 if tot_time_btw_batch > 180 & tot_time_btw_batch < 270
replace session= 4 if tot_time_btw_batch > 270 & tot_time_btw_batch < 360
replace session= 5 if tot_time_btw_batch > 360 & tot_time_btw_batch < 450
replace session= 6 if tot_time_btw_batch > 450 & tot_time_btw_batch < 540
replace session= 6 if tot_time_btw_batch > 540 & tot_time_btw_batch < 630
replace session= 8 if tot_time_btw_batch > 630 & tot_time_btw_batch < 720
replace session= 9 if tot_time_btw_batch > 720 & tot_time_btw_batch < 810
replace session= 10 if tot_time_btw_batch > 810 & tot_time_btw_batch < 900
replace session= 11 if tot_time_btw_batch > 900 & tot_time_btw_batch < 990
replace session= 12 if tot_time_btw_batch > 990

* gen the number of updates made by candidates across spells, within 6 months
gen sidyear = real(string(sid) + string(session))
bys sidyear (batch_start_date d_sent_date) : gen nb_s_update1 = s_salary[_n] != s_salary[_n-1] 
bys sidyear : egen nb_s_update_sid = total(nb_s_update1) 
replace nb_s_update_sid = nb_s_update_sid - 1 if !mi(nb_s_update_sid)

* gen the number of updates made by candidates within a spell
bys batchid (batch_start_date d_sent_date) : gen nb_s_update2 = s_salary[_n] != s_salary[_n-1]
bys batchid : egen nb_s_update_batchid = total(nb_s_update2) 
replace nb_s_update_batchid = nb_s_update_batchid - 1 if !mi(nb_s_update_batchid)

* total_delta_s measures whether the candidate has updated upwards or downwards
bys sidyear (batch_start_date d_sent_date): gen delta_s = s_salary[_n] - s_salary[_n-1] if nb_s_update1 == 1
replace delta_s = 0 if mi(delta_s) & !mi(s_salary)
bys sidyear (batch_start_date d_sent_date): gen cum_delta_s = sum(delta_s)
bys sidyear (batch_start_date d_sent_date): egen total_delta_s = total(delta_s)
gen cum_delta_s_alt = delta_s
forvalues i=0/100{
bys sidyear(batch_start_date d_sent_date): replace cum_delta_s_alt = cum_delta_s_alt[_n-1] if delta_s[_n] == 0
}
replace cum_delta_s_alt = 0 if mi(cum_delta_s_alt)

* total_delta_s_batchid measures whether, within a spell, the candidate has updated upwards or downwards
bys batchid (batch_start_date d_sent_date): gen delta_s_batchid = s_salary[_n] - s_salary[_n-1] if nb_s_update2 == 1
replace delta_s_batchid = 0 if mi(delta_s_batchid) & !mi(s_salary)
bys batchid (batch_start_date d_sent_date): gen cum_delta_s_batchid  = sum(delta_s_batchid)
bys batchid (batch_start_date d_sent_date): egen total_delta_s_batchid = total(delta_s_batchid)


label var nb_s_update_sid "number of updates made to s_salary in a given search spell"

gen update_dummy_sid = 0
replace update_dummy_sid = 1 if nb_s_update_sid > 0 & !mi(nb_s_update_sid)

gen update_dummy_batchid = 0
replace update_dummy_batchid = 1 if nb_s_update_batchid > 0 & !mi(nb_s_update_batchid)

count if nb_s_update_sid > 0
count if nb_s_update_batchid > 0

gen diff_d_s = d_salary - s_salary
sort sid batchid batch_start_date d_sent_date

* Batch id FE: tells you the relationship between logd_salary and logs_salary
keep if update_dummy_batchid > 0 & !mi(update_dummy_batchid)

reghdfe logd_salary centered_logask $controls, absorb(batchid) vce(cluster jobid sid)
estimates store IndFE_batch
estadd local hascontrols "X"
estadd local timefe "X"

reghdfe logd_salary centered_logask i.female#c.centered_logask $controls, absorb(batchid) vce(cluster jobid sid)
estimates store IndFE_inter_batch
estadd local hascontrols "X"
estadd local timefe "X"

reghdfe logd_salary centered_logask $controls if  total_delta_s_batchid > 0, absorb(batchid) vce(cluster jobid sid)
estimates store IndFE_up_batch
estadd local hascontrols "X"
estadd local timefe "X"

reghdfe logd_salary centered_logask i.female#c.centered_logask $controls if  total_delta_s_batchid > 0 , absorb(batchid) vce(cluster jobid sid)
estimates store IndFE_up_inter_batch
estadd local hascontrols "X"
estadd local timefe "X"

reghdfe logd_salary centered_logask $controls if  total_delta_s_batchid <= 0, absorb(batchid) vce(cluster jobid sid)
estimates store IndFE_down_batch
estadd local hascontrols "X"
estadd local timefe "X"

reghdfe logd_salary centered_logask i.female#c.centered_logask  $controls if  total_delta_s_batchid <= 0 , absorb(batchid) vce(cluster jobid sid)
estimates store IndFE_down_inter_batch
estadd local hascontrols "X"
estadd local timefe "X"

esttab 	IndFE_batch IndFE_inter_batch IndFE_up_batch IndFE_up_inter_batch IndFE_down_batch IndFE_down_inter_batch using "$tables/tableF1.tex", $updaters


}

 
 *----------------------------------------------------------------------------*
 *		--- Table G.1 The Ask Salary as Signal of Quality: Relationship		  *
 *					  between Firm Rank and Residual Log Ask --- 			  *
 *----------------------------------------------------------------------------* 
 
{ 
	
 /*
 This Table uses estimates from a different paper using Hired.com data:
 'Bidding for Talent: Equilibrium Wage Dispersion on a High-Wage Online Job Board'
  with Benjamin Scuderi. The data there is also proprietary. Details on the 
  description of these rankings are provided in Rousille Scuderi (2023)
 */


import delimited using "$indata/unique_ranking.csv", c clear
ren cid companyid
drop id

sort pi_1
gen rank = _n

sum rank, d
local max = r(max)

gen norm_rank = (rank/`max')*100

tempfile ranking
save `ranking'

* Load Data Set

$opendata
$dropvars
$timerestrict
	

* Merge with ranking data
merge m:1 companyid using `ranking'

* mean center the log ask salary
sum logs_salary, meanonly
gen centered_logask = logs_salary - `r(mean)'	


* firm rank on bid level 

sum norm_rank
scalar mean = r(mean)

ivreg2 norm_rank i.female i.smonthyear, cluster(sid jobid)
estimates store rank_0
	estadd scalar mean
	estadd local hascontrols "" 
	estadd local timefe "X"
	
ivreg2 norm_rank i.female $controls, cluster(sid jobid)
estimates store rank_1
	estadd scalar mean
	estadd local hascontrols "X" 
	estadd local timefe "X"

ivreg2 norm_rank i.female $controls centered_logask, cluster(sid jobid)
estimates store rank_2
	estadd scalar mean
	estadd local hascontrols "X" 
	estadd local timefe "X"

	   
* firm rank on final salary level 
keep if !mi(logdf_salary)
  
sum norm_rank
scalar mean = r(mean)

ivreg2 norm_rank i.female i.smonthyear, cluster(sid jobid)
estimates store rank_3
	estadd scalar mean
	estadd local hascontrols "" 
	estadd local timefe "X"
	
ivreg2 norm_rank i.female $controls, cluster(sid jobid)
estimates store rank_4
	estadd scalar mean
	estadd local hascontrols "X" 
	estadd local timefe "X"

ivreg2 norm_rank i.female $controls centered_logask, cluster(sid jobid)
estimates store rank_5
	estadd scalar mean
	estadd local hascontrols "X" 
	estadd local timefe "X"
	
esttab rank_0 rank_1 rank_2 rank_3 rank_4 rank_5 using "$tables/tableG1.tex",  $tablerank2 

}
 
 